# !/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : 相关系数法.py
# @Author: dongguangwen
# @Date  : 2025-02-13 22:18
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from sklearn.datasets import load_iris


# 1. 读取数据集(鸢尾花数据集)
data = load_iris()
data = pd.DataFrame(data.data, columns=data.feature_names)

print(data.head(5))

# 2. 皮尔逊相关系数
corr = pearsonr(data['sepal length (cm)'], data['sepal width (cm)'])
print(corr, '皮尔逊相关系数:', corr[0], '不相关性概率:', corr[1])

# 3. 斯皮尔曼相关系数
corr = spearmanr(data['sepal length (cm)'], data['sepal width (cm)'])
print(corr, '斯皮尔曼相关系数:', corr[0], '不相关性概率:', corr[1])

"""
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
PearsonRResult(statistic=np.float64(-0.11756978413300204), pvalue=np.float64(0.151898260711448)) 皮尔逊相关系数: -0.11756978413300204 不相关性概率: 0.151898260711448
SignificanceResult(statistic=np.float64(-0.166777658283235), pvalue=np.float64(0.04136799424884587)) 斯皮尔曼相关系数: -0.166777658283235 不相关性概率: 0.04136799424884587
"""